import scrapy


class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['movie.douban.com']
    start_urls = []
    pageSize = 50

    def parse(self, response):
        print("===================爬虫开始=====================")
        # book_list = response.xpath(
        #     '//*/div/div/div/div/div/div/div/div/a/p').extract()
        # for book in book_list:
        #     print(book)

        # filename = 'teacher.html'
        # open(filename, 'w').write(response.body)
        title = response.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]/div[1]/a/span[1]/text()').getall()
        # 评分
        corns = response.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]/div[2]/div/span[2]/text()').getall()

        for corn in corns:
            print(corn)
        for file_name in title:
            print(file_name)
        # 评价
        contexts = response.xpath('//*[@id="content"]/div/div[1]/ol/li/div/div[2]/div[2]/p[2]/span/text()').getall()
        for context in contexts:
            print(context)
        # print(title[1])
        # body = response.body.decode('utf-8')
        # print(body)
        # href = response.xpath('//*[@id="video_73540677"]/div[2]/p[1]/a').extract()
        # print(href[0])
        # hxs=HtmlXPathSelector(response)
        print("=====================爬虫结束============")

    def start_requests(self):
        for i in range(10):
            url = f'https://movie.douban.com/top250?start={i * 25}&filter='
            print(url)
            yield scrapy.Request(url=url, callback=self.parse)
